Read in the data
library (readr)
urlfile1="https://raw.githubusercontent.com/jsegich/STA553/main/data/income_per_person.csv"
urlfile2="https://raw.githubusercontent.com/jsegich/STA553/main/data/life_expectancy_years.csv"
urlfile3="https://raw.githubusercontent.com/jsegich/STA553/main/data/population_total.csv"
urlfile4="https://raw.githubusercontent.com/jsegich/STA553/main/data/countries_total.csv"
income_per_person<-read_csv(url(urlfile1))
life_expectancy_years<-read_csv(url(urlfile2))
population_total<-read_csv(url(urlfile3))
countries_total<-read_csv(url(urlfile4))
Prepare a single data set based on data set 12
income_per_person_fin <- income_per_person %>%
gather(key = "Year", # the column names of the wide table
value = "Income", # the numerical values of the table
- geo, # drop country variable: its value will not be gathered (stacked)!
na.rm = FALSE) # removing records with missing values
life_expectancy_years_fin <- life_expectancy_years %>%
gather(key = "Year", # the column names of the wide table
value = "Life Expectancy", # the numerical values of the table
- geo, # drop country variable: its value will not be gathered (stacked)!
na.rm = FALSE) # removing records with missing values
population_total_fin <- population_total %>%
gather(key = "Year",
value = "Population",
- geo,
na.rm = FALSE)
#Join Life Expectancy and Income Per Person
LifeExpIncom <- full_join(life_expectancy_years_fin,income_per_person_fin)
#Change year variable to be 4 digits
LifeExpIncom <- LifeExpIncom %>%
mutate(year = substr(Year,1,5)) %>%
select(-Year)
#Merge income and life exp with Country info
innerjoin = merge(x = LifeExpIncom, y = countries_total, by.x = 'geo', by.y ='name', all = TRUE)
##Final data set
Final_Set <- full_join(LifeExpIncom,innerjoin)
#Need a set that includes population information and region info
#rename year to match Year in other set
population_total_fin<- rename(population_total_fin,year='Year')
#Join the sets
innerjoin2 = merge(x = population_total_fin, y = countries_total, by.x = 'geo', by.y ='name', all = TRUE)
Fin= full_join(x=Final_Set,y=innerjoin2 , by.x = 'geo', by.y ='geo', all = TRUE)
#Subset the data to only include 2015
Sub_2015<- subset(Final_Set, Final_Set$year=="2015")
Sub_2015.population<-subset(population_total_fin, population_total_fin$year=="2015")
#Change the variable name
Sub_2015<- rename(Sub_2015,Life.Expectancy='Life Expectancy')
Final_Set<- rename(Final_Set,Life.Expectancy= 'Life Expectancy')
# Add population total
Final_Sub_2015 = merge(x=Sub_2015,Sub_2015.population , by.x = 'geo', by.y ='geo', all = FALSE)
Here we create a scatter plot for data from 2015
fig <- Final_Sub_2015 %>%
plot_ly(
type = 'scatter',
mode = 'markers',
alpha = 0.5,
x = ~Final_Sub_2015$Life.Expectancy,
sizes = c(5, 20),
y = ~Final_Sub_2015$Income,
size = sqrt(Final_Sub_2015$Population),
color = Final_Sub_2015$geo,
text = ~paste('Population:', Population, '<br>Country:', geo),
marker = list(symbol = 'circle', sizemode = 'diameter',
line = list(width = 2))
)
fig %>% layout(
title = list(text = "Life Expectance vs. Income"),
xaxis = list(
title=list(text = 'Life Expectancy')),
yaxis = list(
title=list(text = 'Income in USD'))
)
fig
Here we create an animated plot
#Subset to only include variables of interest
Finy<-mutate(Fin,Num_year=as.numeric(Fin$year))
Fin<-Finy[,c("Life Expectancy","Income","region", "Population", "Num_year") ]
#Omit Missing Values
Fin<-na.omit(Fin)
p <- ggplot(data=Fin, aes(x = Fin$`Life Expectancy`,
y=Fin$Income,
size = Population,
colour = region)) +
geom_point(aes(size = Population, ids = region ),
show.legend = TRUE,
alpha = 0.4) +
scale_size(range = c(2, 12)) +
scale_y_log10() +
labs(x = "Life Expectancy",
y = "Income in USD",
title=" Life Expectancy vs. Income") +
## gganimate command
transition_time(Fin$Num_year)
##
animate(p, renderer = gifski_renderer())
Read in the data
urlfile1="https://raw.githubusercontent.com/jsegich/STA553/main/data/POC.csv"
gas_stations<-read_csv(url(urlfile1))
head(gas_stations)
## # A tibble: 6 x 32
## X1 site_row_id STATE county ADDRESS CITY ycoord xcoord SITE_DESCRIPTION
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 1 1-3R8J-494 CA Los An… 37120 4… PALM… 34.6 -118. Los Angeles-Long…
## 2 2 1-3R8J-362 WA Frankl… 1212 N … PASCO 46.2 -119. Kennewick-Pasco-…
## 3 3 1-3R8J-199 NV Washoe 99 DAMO… RENO 39.4 -120. Reno-Sparks NV
## 4 4 1-3R8J-261 UT Salt L… 5404 S … SALT… 40.7 -112. Salt Lake City UT
## 5 5 1-3R8J-493 CA Los An… 1731 E … LANC… 34.7 -118. Los Angeles-Long…
## 6 6 1-3R8J-508 WA Benton 2707 S … KENN… 46.2 -119. Kennewick-Pasco-…
## # … with 23 more variables: service_or_fuel <chr>, diesel <chr>,
## # twentyfour_hour_flag <chr>, car_wash <chr>, truckstop_flag <chr>,
## # description <chr>, PUMP_TECH <chr>, POC <dbl>, HIFCA <dbl>, ZIPnew <dbl>,
## # POCAGE <dbl>, POCGAP <dbl>, ZIPPOC <dbl>, HFG <dbl>, MSA <dbl>,
## # dist.to.poc <dbl>, cate.poc.density <chr>, cate.poc.age <chr>,
## # cate.poc.age.20 <chr>, cate.poc.intensity <chr>,
## # cate.poc.intensity.tot <chr>, MSA_POC <dbl>, MSA_POC.1 <dbl>
Create random sample of 500 Gas stations, and provide requested information for each
library(leaflet)
gas_sub= sample_n(gas_stations, 500)
str(gas_sub)
## spec_tbl_df [500 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ X1 : num [1:500] 53030 4730 57608 1890 32306 ...
## $ site_row_id : chr [1:500] "1-1APKRO5" "1-40CZ-4579" "1-1FE3H5W" "1-3UVO-3853" ...
## $ STATE : chr [1:500] "GA" "CO" "IN" "OH" ...
## $ county : chr [1:500] "Henry" "Douglas" "Putnam" "Franklin" ...
## $ ADDRESS : chr [1:500] "6105 HWY 155 N" "830 WILCOX ST" "1207 S BLOOMINGTON ST" "215 W BRIDGE ST" ...
## $ CITY : chr [1:500] "STOCKBRIDGE" "CASTLE ROCK" "GREENCASTLE" "DUBLIN" ...
## $ ycoord : num [1:500] 33.6 39.4 39.6 40.1 32.8 ...
## $ xcoord : num [1:500] -84.2 -104.9 -86.9 -83.1 -97.3 ...
## $ SITE_DESCRIPTION : chr [1:500] "Atlanta-Sandy Springs-Marietta GA" "Denver-Aurora-Broomfield CO" "RURAL" "Columbus OH" ...
## $ service_or_fuel : chr [1:500] "Fuel" "Fuel" "Fuel" "Fuel" ...
## $ diesel : chr [1:500] "N" "Y" "Y" "N" ...
## $ twentyfour_hour_flag : chr [1:500] "Y" "N" "Y" "N" ...
## $ car_wash : chr [1:500] "N" "N" "N" "N" ...
## $ truckstop_flag : chr [1:500] "Y" "N" "N" "N" ...
## $ description : chr [1:500] "URBAN" "URBAN" "RURAL" "URBAN" ...
## $ PUMP_TECH : chr [1:500] "C" "O" "O" "O" ...
## $ POC : num [1:500] 0 0 0 0 0 0 0 0 0 0 ...
## $ HIFCA : num [1:500] 0 1 0 1 1 1 0 0 0 0 ...
## $ ZIPnew : num [1:500] 30281 80104 46135 43017 76137 ...
## $ POCAGE : num [1:500] NA NA NA NA NA NA NA NA NA NA ...
## $ POCGAP : num [1:500] NA NA NA NA NA NA NA NA NA NA ...
## $ ZIPPOC : num [1:500] 2 0 0 0 1 0 0 0 0 0 ...
## $ HFG : num [1:500] 0 0 0 0 0 0 0 0 0 0 ...
## $ MSA : num [1:500] 520 2080 0 1840 2800 4760 0 6160 5720 2440 ...
## $ dist.to.poc : num [1:500] 5.361 34.094 39.171 9.112 0.233 ...
## $ cate.poc.density : chr [1:500] "(5,Inf]" "(-1e-06,1]" "(-1e-06,1]" "(1,5]" ...
## $ cate.poc.age : chr [1:500] "(15,140]" "(140,Inf]" "(140,Inf]" "(0,15]" ...
## $ cate.poc.age.20 : chr [1:500] "(15,140]" "(140,Inf]" "(140,Inf]" "(0,15]" ...
## $ cate.poc.intensity : chr [1:500] "(5,Inf]" "(-0.0001,0]" "(-0.0001,0]" "(0,5]" ...
## $ cate.poc.intensity.tot: chr [1:500] "(8,Inf]" "(-0.0001,0]" "(-0.0001,0]" "(0,8]" ...
## $ MSA_POC : num [1:500] 1 0 0 0 1 0 0 0 0 0 ...
## $ MSA_POC.1 : num [1:500] 1 0 0 0 1 0 0 0 0 0 ...
## - attr(*, "spec")=
## .. cols(
## .. X1 = col_double(),
## .. site_row_id = col_character(),
## .. STATE = col_character(),
## .. county = col_character(),
## .. ADDRESS = col_character(),
## .. CITY = col_character(),
## .. ycoord = col_double(),
## .. xcoord = col_double(),
## .. SITE_DESCRIPTION = col_character(),
## .. service_or_fuel = col_character(),
## .. diesel = col_character(),
## .. twentyfour_hour_flag = col_character(),
## .. car_wash = col_character(),
## .. truckstop_flag = col_character(),
## .. description = col_character(),
## .. PUMP_TECH = col_character(),
## .. POC = col_double(),
## .. HIFCA = col_double(),
## .. ZIPnew = col_double(),
## .. POCAGE = col_double(),
## .. POCGAP = col_double(),
## .. ZIPPOC = col_double(),
## .. HFG = col_double(),
## .. MSA = col_double(),
## .. dist.to.poc = col_double(),
## .. cate.poc.density = col_character(),
## .. cate.poc.age = col_character(),
## .. cate.poc.age.20 = col_character(),
## .. cate.poc.intensity = col_character(),
## .. cate.poc.intensity.tot = col_character(),
## .. MSA_POC = col_double(),
## .. MSA_POC.1 = col_double()
## .. )
label.msg <- paste(paste("State:", gas_sub$STATE),paste("Zip:", gas_sub$ZIPnew), paste("\n County:",gas_sub$county),"\n")
redicon <- makeIcon(
iconUrl = "https://raw.githubusercontent.com/jsegich/STA553/main/data/0f61ba72e0e12ba59d30a50295964871.png?raw=true",
iconWidth = 60, iconHeight = 60
)
# define a leaflet map
map12<- leaflet(gas_sub) %>%
addTiles() %>%
setView(lng=mean(gas_sub$xcoord), lat=mean(gas_sub$ycoord), zoom = 4) %>%
addMarkers(~gas_sub$xcoord, ~gas_sub$ycoord,label = ~label.msg, icon=redicon)
map12